**** Share by region and population of urban centres. Downloaded from: https://data.humdata.org/dataset/repartition-desagregee-de-la-population-en-2013
	*** Most recent dataset is from 2013. OK to estimate share living in regional capitals but for share of population by region better to use more recent ANSD data ()

import excel "$root\input\bd-analytic-rgphae2013.xlsx", sheet("CACR") cellrange(A1:F553) firstrow clear

destring Urbain Rural, replace

gen location2=1 if COMMUNEARRONDISSEMENTVILLE=="VILLE DE DAKAR"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. DIOURBEL"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. FATICK"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. KAFFRINE"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. KAOLACK"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM .KEDOUGOU"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. KOLDA"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. LOUGA"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM .MATAM"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. SAINT LOUIS"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. SEDHIOU"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. TAMBACOUNDA"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="VILLE DE THIES"
replace location2=1 if COMMUNEARRONDISSEMENTVILLE=="COM. ZIGUINCHOR"
replace location2=2 if Urbain!=. & location2==.
replace location2=3 if Rural!=.

egen pop=rowtotal(Urbain Rural)
collapse (sum) pop, by (REGION location2)
egen pop_reg=total(pop), by(REGION)
ren REGION nam_reg2
replace nam_reg2=proper(nam_reg2)
gen shar_loc=pop/pop_reg

gen mi01=.
	local i=1
	foreach x in "Dakar" "Ziguinchor" "Diourbel" "Saint-Louis" "Tambacounda" "Kaolack" "Thies" "Louga" "Fatick" "Kolda" "Matam" "Kaffrine" "Kedougou" "Sedhiou"{
	replace mi01=`i' if nam_reg2=="`x'"
	local i=`i'+1
	}

save "$root/dat/loc_senegal.dta", replace

*** Variables for analysis

use "$root/dat/SN_2020_Covid19_ano.dta", clear
	
		* Create region where respondent was living a month ago for post-stratificaton
		gen old_reg=mi04
		replace old_reg=mi01 if mi04==15 | old_reg==. //for people that used to live abroad, use current place of residence
		
		replace ed01=1 if ed01==0 // to be checked
		replace ed03=0 if ed02==0
		gen nb_adult=ed01-ed03
		gen dakar=(mi01==1 & mi01!=.)
		gen dakar2=(old_reg==1 & old_reg!=.)
		label define dakar 0"Rest of Senegal" 1"Dakar" 
		label value dakar dakar
		gen location=1 if mi02<=14
		replace location=2 if mi02==15
		replace location=3 if mi02==16
		label define location 1 "Large town" 2 "Small town" 3 "Village"
		label val location location
		
		gen mig_reg=(mi01!=mi04 & mi04!=.)
		gen mov_dakar=(mi01==1 & mi04!=1)
		
		replace mi05=mi02 if mi05==.
		gen location2=1 if mi05<=14
		replace location2=2 if mi05==15
		replace location2=3 if mi05==16
		
		gen educ=ls04
		replace educ=3 if ls04==4
		
		replace mg10=100 if mg10>100 & mg10!=. // cap number of daily contacts at 100
		
		gen reduce_meal_size=(fs011>0 & fs011!=.)
		gen reduce_nb_meal=(fs012>0 & fs012!=.)
		
		gen p_rice_perf=fs06a/ fs05a
		gen p_rice_non_perf=fs06b/ fs05b
		gen p_rice_local=fs06c/ fs05c

		gen miss_hh_income=(nc02==. & nc01!=.)
		gen hh_inc=nc02
		replace hh_inc=0 if miss_hh==1
		replace hh_inc=5 if hh_inc>5 & hh_inc!=.
		
		gen french=(ls01==2)
		gen village2=(location2==3)
			
		***salary midrange
		gen salary_midrange=100000/2 if nc02==1
		replace salary_midrange=(100000+200000)/2 if nc02==2
		replace salary_midrange=(200000+300000)/2 if nc02==3
		replace salary_midrange=(300000+500000)/2 if nc02==4
		replace salary_midrange=(500001+1000000)/2 if nc02==5
		replace salary_midrange=(1000001+2000000)/2 if nc02==6
		replace salary_midrange=(2000001+5000000)/2 if nc02==7
		replace salary_midrange=(5000001+10000000)/2 if nc02==8
		replace salary_midrange=100000/2 if nc02==1
		replace salary_midrange=0 if nc02==0
		***adult equivalent: (A+aC)^teta with a=0.3 and teta=0.9
		gen adult=ed01-ed03
		gen adultequivalent=(adult+0.3*ed03)^0.9
		***household salary
		gen hh_salary=salary_midrange/adultequivalent
		***poverty line (Conversion factor Senegal LCU/$ PPP=223.60)
		gen pov_1_90 =1 if hh_salary/31 <=424.84
		replace pov_1_90 =0 if hh_salary!=. & pov_1_90 !=1
		
		gen pov_3_20 =1 if hh_salary/31 <=715.52
		replace pov_3_20 =0 if hh_salary!=. & pov_3_20 !=1
		
		gen pov_5_50 =1 if hh_salary/31 <=1229.8
		replace pov_5_50 =0 if hh_salary!=. & pov_5_50 !=1
		
	
		*** Merge location data
		merge m:1 mi01 location2 using "$root/dat/loc_senegal.dta", nogen update
		
			*** Sampling weights
			
					*** Region share (from census, ref: ANSD)
		gen region_share=""
	
		local r "23.1" "4.1" "11.1" "6.6" "5.2" "7.1" "13" "6.4" "5.3" "4.9" "4.3" "4.3" "1.1" "3.4"
		local i=1
		foreach x in "Dakar" "Ziguinchor" "Diourbel" "Saint-Louis" "Tambacounda" "Kaolack" "Thiès" "Louga" "Fatick" "Kolda" "Matam" "Kaffrine" "Kédougou" "Sédhiou"{
		local reg : word `i' of "`r'"
		replace region_share="`reg'" if mi01==`i'
		local i=`i'+1
		}
		destring region_share, replace
		
		gen old_region_share=""
	
		local r "23.1" "4.1" "11.1" "6.6" "5.2" "7.1" "13" "6.4" "5.3" "4.9" "4.3" "4.3" "1.1" "3.4"
		local i=1
		foreach x in "Dakar" "Ziguinchor" "Diourbel" "Saint-Louis" "Tambacounda" "Kaolack" "Thiès" "Louga" "Fatick" "Kolda" "Matam" "Kaffrine" "Kédougou" "Sédhiou"{
		local reg : word `i' of "`r'"
		replace old_region_share="`reg'" if old_reg==`i'
		local i=`i'+1
		}
		destring old_region_share, replace
		
		gen fem_share=""
		local r "0.505711" "0.487527" "0.546984" "0.511718" "0.503089" "0.524018" "0.508737" "0.521481" "0.518663" "0.502456" "0.528048" "0.506995" "0.473215" "0.503877"
		local i=1
		foreach x in "Dakar" "Ziguinchor" "Diourbel" "Saint-Louis" "Tambacounda" "Kaolack" "Thiès" "Louga" "Fatick" "Kolda" "Matam" "Kaffrine" "Kédougou" "Sédhiou"{
		local reg : word `i' of "`r'"
		replace fem_share="`reg'" if mi01==`i'
		local i=`i'+1
		}
		destring fem_share, replace
		gen reg_sex_share=fem_share*region_share if ls02==1
		replace reg_sex_share=(1-fem_share)*region_share if ls02==0
		egen strata_reg_sex=group(mi01 ls02)
		
			*** Number of phones is missing for 151 obervations. Need to be imputed if we want to use it as sampling weight
			sum nbre_tel, d
			gen ln_phone=ln(nbre_tel)
			sum ln_phone, d
			
			*** Variable number of phone is skewed to the right and log normal. Better to use log for imputation. 
			xi:reg ln_phone  i.educ nb_adult ed03  i.old_reg , ro

			xi: impute ln_phone i.educ nb_adult ed03  i.old_reg, gen(nbre_tel2)
			replace nbre_tel2=exp(nbre_tel2)
			
			* Sample weights are the probability of inclusion of the household (Chance of hh to be contacted =1/number of phones in the hh) multiplied by size of hh. 
			gen weight_hh=ed01/nbre_tel2 
			gen weight_child=ed03/nbre_tel2 
			gen weight_adult=nb_adult/nbre_tel2 
			
			gen weight_hh2=ed01/nbre_tel
			gen weight_child2=ed03/nbre_tel 
			gen weight_adult2=nb_adult/nbre_tel 
			
			egen total_consent=total(consent) if pilot==0
			egen xx=total(consent) if pilot==0, by(mi01) 
			gen shar_reg_sample=xx/total_consent*100 if pilot==0
			drop xx
			
			gen weight_reg_hh=weight_hh*region_share/shar_reg_sample
			
			*** Group region for post-stratification
			
			gen reg_group=1 if mi01==1
			replace reg_group=2 if mi01==2 | mi01==10 | mi01==14 | mi01==5 | mi01==13
			replace reg_group=3 if mi01==3
			replace reg_group=4 if mi01==4 | mi01==11 | mi01==8
			replace reg_group=5 if mi01==6 | mi01==12 | mi01==9
			replace reg_group=6 if mi01==7
			
			gen old_reg_group=1 if old_reg==1
			replace old_reg_group=2 if old_reg==2 | old_reg==10 | old_reg==14 | old_reg==5 | old_reg==13
			replace old_reg_group=3 if old_reg==3
			replace old_reg_group=4 if old_reg==4 | old_reg==11 | old_reg==8
			replace old_reg_group=5 if old_reg==6 | old_reg==12 | old_reg==9
			replace old_reg_group=6 if old_reg==7

			label define reg_group 1 "Dakar" 2 "South" 3 "Diourbel" 4 "North" 5 "Centre" 6 "Thies"
			label value reg_group reg_group
			label value old_reg_group reg_group
			
			*** Create strata combining regions grouped and female share
			egen tag=tag(mi01) if region_share!=.
			egen xx=total(region_share) if tag==1, by(reg_group)
			egen region_share2=max(xx), by(reg_group)
			drop xx
			
			egen xx=wtmean(fem_share) if tag==1, weight(region_share)
			egen fem_share2=max(xx), by(reg_group)
			gen reg_sex_share2=fem_share2*region_share2 if ls02==1
			replace reg_sex_share2=(1-fem_share2)*region_share2 if ls02==0
			egen strata_reg_sex2=group(reg_group ls02)
			drop xx tag
			
			egen tag=tag(old_reg) if region_share!=.
			egen xx=total(region_share) if tag==1, by(old_reg_group)
			egen old_region_share2=max(xx), by(old_reg_group)
			drop xx
			
			egen xx=wtmean(fem_share) if tag==1, weight(region_share)
			egen old_fem_share2=max(xx), by(old_reg_group)
			drop xx
			gen old_reg_sex_share2=old_fem_share2*old_region_share2 if ls02==1
			replace old_reg_sex_share2=(1-old_fem_share2)*old_region_share2 if ls02==0
			egen old_strata_reg_sex2=group(old_reg_group ls02)
			
			gen age_group=1 if ls03<30
			replace age_group=2 if ls03>=30 & ls03<50
			replace age_group=3 if ls03>=50
			
			*** Stratification migration
			gen share_mig=.
			replace share_mig=0.486*0.496 if ls02==0 & educ==1
			replace share_mig=0.486*0.192 if ls02==0 & educ==2
			replace share_mig=0.486*0.312 if ls02==0 & educ==3
			replace share_mig=0.514*0.6 if ls02==1 & educ==1
			replace share_mig=0.514*0.182 if ls02==1 & educ==2
			replace share_mig=0.514*0.218 if ls02==1 & educ==3
			
			egen strat_mig=group(ls02 educ) 


			

	save "$root/dat/SN_2020_Covid19.dta", replace